import json
import time

import requests
import re

data_dict = {}
for item in range(1, 12):
    url = f'https://quotes.toscrape.com/page/{item}/'
    response = requests.get(url)
    data = response.text
    # print(data)
    r = re.findall(r'<span class="text" itemprop="text">(.*?)</span>', data)
    r2 = re.findall(r'<span>by <small class="author" itemprop="author">(.*?)</small>', data)
    # r3 = re.findall(r'>humor<', data)
    print('-----------')
    print(f'正在爬取第{item}页内容')

    if r2:
        for i in range(len(r2)):
            print(f'{r[i]},by {r2[i]}')
            data_dict[r2[i]] = r[i]
            with open("output.json", 'w', encoding='utf-8') as json_file:
                json.dump(data_dict, json_file, ensure_ascii=False, indent=4)
        print('-----------')
        print(f'爬取第{item}页内容完成')
        time.sleep(1)
    else:
        print(f'第{item}页内容不存在')
        break
print('进程结束')